// TODO: optionally find and remove headers that match the signature but don't have a corresponding subdirectory
// TODO: exclusions should apply to generated headers as well???
#include <iostream>
#include <string>
#include <filesystem>
#include <exception>
#include <vector>
#include <algorithm>

#include "simple/file.hpp" // ropen TODO: remove

#include "simple/io/open.h" // open mode no_such_entity interface operation
#include "simple/io/read_iterator.h" // read_iterator
#include "simple/io/write.h" // write as_byte_range
#include "simple/io/seek.h" // seek
#include "simple/io/resize.h" // resize
#include "simple/support/algorithm.hpp" // set_difference, copy, transform, find, mismatch
#include "simple/support/tuple_utils.hpp" // transform
#include "simple/support/function_utils.hpp" // overload transform_arg disjunction_f
#include "simple/support/iterator.hpp" // *
#include "simple/support/misc.hpp" // to_

namespace fs = std::filesystem;
using namespace std::literals;
using namespace simple;
using namespace io;
using namespace support;

int debug = 1;

const char * exclude_filename = ".allinclude_exclude";

const auto types = std::array{".h"s, ".hpp"s};

const auto signature =  "// this is an auto-generated allinclude\n"s;

struct include_info
{
	fs::path directory;
	std::vector<fs::path> includes = {};
	std::array<bool, 2> has_includes = { false, false };
};

std::vector<include_info> allincludes;
std::vector<fs::path> excluded_headers;

std::array<bool, 2> generate_allinclude_info(fs::path directory)
{
	if(debug >= 3) std::cerr << "Processing " << directory << '\n';
	include_info info{std::move(directory)};

	auto is_directory = [](auto&& x) { return x.is_directory(); };
	auto is_header = [](auto&& x)
	{
		return find(types, x.path().extension()) != types.end() &&
			find(excluded_headers, x.path()) == excluded_headers.end(); // TODO: doesn't work when run without -t ??
	};

	// vv COMMENTS vv making_a_case vs_views pp_in_out
	const auto headers_offset = copy(fs::directory_iterator(info.directory),
		out_filter(disjunction_f(is_directory, is_header),
		out_partition(is_directory, offset_iterator(info.includes),
		out_transform(&fs::directory_entry::path, // vv COMMENTS vv implicit_path
		offset_expander(info.includes)
	)))).out.partition_point;
	auto headers_begin = headers_offset.base(); // dreaded iterator incompatibility

	if(debug >= 3) std::cerr << "Filtering directories with extensions" << '\n';
	// ignore directories with extensions
	// ideally we would just assume no extension for directories, but it's awkward with std filesystem and why would you even have a directory with dots
	const auto dirs_end = std::remove_if(info.includes.begin(), headers_begin,
		[](const auto& dir) { return dir.has_extension(); });
	if(debug && dirs_end != headers_begin)
	{
		std::cerr << "WARNING: Ignoring:" << '\n';
		std::copy(dirs_end, headers_begin, std::ostream_iterator<fs::path>(std::cerr, "\n"));
	}

	if(debug >= 3) std::cerr << "Sorting by name and extension" << '\n';
	std::sort(info.includes.begin(), dirs_end);
	std::sort(headers_begin, info.includes.end(), transform_arg{[](const auto& a)
		{ return std::make_tuple(a.stem(), a.extension()); }});

	if(debug >= 3) std::cerr << "Filtering headers with subdirectory names" << '\n';
	auto headers_end = set_difference<ignore_count>(
		headers_begin, info.includes.end(), info.includes.begin(), dirs_end, headers_begin,
		transform_arg{&fs::path::stem}
	);

	if(debug >= 4)
	{
		if(info.includes.begin() != dirs_end)
		{
			std::cerr << "Subdirectories: " << '\n';
			std::copy(info.includes.begin(), dirs_end, std::ostream_iterator<fs::path>(std::cerr, "\n"));
			std::cerr << '\n';
		}

		if(headers_begin != headers_end)
		{
			std::cerr << "Includes: " << '\n';
			std::copy(headers_begin, headers_end, std::ostream_iterator<fs::path>(std::cerr, "\n"));
			std::cerr << '\n';
		}
	}

	if(dirs_end != info.includes.begin())
	{
		if(debug >= 3) std::cerr << "Adding appropriate headers for subdirectories" << '\n';
		headers_end = std::transform(offset_iterator(info.includes), offset_iterator(info.includes, dirs_end),
			out_flatten_tuple(out_optional(offset_expander(info.includes, headers_end))),
			[](auto&& dir)
			{
				return transform([&dir](bool has_include, auto&& extension)
					-> std::optional<fs::path>
				{
					if(has_include)
						return dir.replace_extension(extension);
					return std::nullopt;
				}, generate_allinclude_info(dir), types);
			}
		).out.out.out.base();
		headers_begin = headers_offset.base(); // un-invalidate
	}

	if(debug >= 3) std::cerr << "Cleaning up include list" << '\n';
	headers_end = headers_end - headers_begin + info.includes.begin();
	std::rotate(info.includes.begin(), headers_begin, info.includes.end());
	info.includes.erase(headers_end, info.includes.end());

	if(debug >= 4)
	{
		std::cerr << "Final Includes: " << '\n';
		copy(info.includes, std::ostream_iterator<fs::path>(std::cerr, "\n"));
		std::cerr << '\n';
	}

	if(debug >= 3) std::cerr << "Checking include types" << '\n';
	transform(types, [&](auto&& type)
		{ return find_if(info.includes, [&type](auto&& x)
			{ return x.extension() == type; }) != info.includes.end(); },
		info.has_includes.begin()
	);

	if(debug >= 4)
	{
		std::cerr << "Has includes: ";
		copy(info.has_includes, std::ostream_iterator<bool>(std::cerr, " "));
		std::cerr << '\n';
	}

	if(debug >= 3) std::cerr << "Done processing: " << info.directory << '\n';

	return allincludes.emplace_back(std::move(info)).has_includes;
}

void write_allincludes(bool force = false)
{
	// TODO: assert only h hpp extensions
	// TODO: for includes assert sorted by stem and extension
	// TODO: assert has_includes is correct??
	transform(allincludes, [](auto&& info)
		{
			if(debug >= 2) std::cerr << "Generating headers for: " << info.directory << '\n';
			return transform( [&info](auto iteration_state, bool has_include, auto&& extension)
			{
				std::string content;
				if(has_include)
					pick_unique(info.includes,
						transform_arg{&fs::path::stem, std::equal_to<>{}},
						[iteration_state](auto range)
						{
							static_assert(iteration_state.index() < 2);
							if constexpr (iteration_state.index() == 0)
								return range.begin();
							else
								return std::prev(range.end());
						},
						out_transform( [&info](auto&& path)
							{ return "#include \"" + fs::relative(path,
								info.directory.parent_path()).native() +
							"\"\n"; },
						out_accumulate(content += signature
					)));


				auto allinc = info.directory;
				allinc.replace_extension(extension);


				if(debug >= 2 && not std::empty(content)) std::cerr << "Header: " << allinc << '\n';
				if(debug >= 4 && not std::empty(content)) std::cerr << "Content: " << '\n' << content << '\n';

				// TODO: a conditional function that can merge the two variants
				using read_write_handle_t = meta::prepend_t<open_error_t, interface<operation::read | operation::write>>;
				auto io_handle = std::empty(content)
					? to_<read_write_handle_t>(open<mode::read, mode::write>(allinc)) // technically only need mode::read here, but prefer common interface
					: to_<read_write_handle_t>(open<mode::read, mode::write, mode::create>(allinc))
				;

				return std::tuple{allinc, std::move(io_handle), content};
			}, info.has_includes, types);
		},
		out_flatten_tuple(
		out_filter([force, read_buffer = std::array<std::byte, 4096>{}](const auto& file) mutable // TODO: will the copy of the read_buffer array be ellided?
		{
			auto& [path, io_handle, content] = file;
			if(debug >= 2) std::cerr << "Checking header on disk: " << path << '\n';

			return std::visit(overload{
				[](no_such_entity) { if(debug >= 2) std::cerr << "Does not exist." << '\n'; return false; }, // content is also empty cause otherwise we create
				// TODO: c++20 can just capture structured binding without this silliness
				[&,&path=path,&content=content](const interface<operation::read | operation::write>& read_handle)
				{
					read_iterator io_begin(read_handle, as_byte_range(read_buffer)), io_end;

					if(debug >= 2) std::cerr << "Checking signature" << '\n';
					auto signature_data = as_byte_view(signature);
					bool empty = io_begin == io_end;
					auto [data_it, io_it] = support::mismatch(signature_data.begin(), signature_data.end(), std::move(io_begin), io_end);
					// can't tell if there was already an empty file or if we created it so gotta conceder that valid,
					// ideally open would tell if it created the file or not, but that's too much to ask from a C API
					if(data_it != signature_data.end() && not empty)
					{
						if(debug) std::cerr << "WARNING: " << path << " is not an allinclude file."  << '\n';
						if(force)
						{
							if(debug) std::cerr << "Overwriting." << '\n';
							return true; // overwrite
						}
						else
						{
							if(debug) std::cerr << "Skipping, use force (-f) to overwrite or exclude it (-x file)." << '\n';
							return false; // skip
						}
					}

					if(debug >= 2) std::cerr << "Checking content" << '\n';
					bool should_write = false;
					if(std::empty(content))
					{
						should_write = true; // got to delete
					}
					else
					{
						auto content_data = as_byte_view(content);
						// this is `should_write = not equal(data.begin(),data.end(),io_it,io_end)`, except need to get io iterator out to check for errors below
						auto mismatch = std::tie(data_it, io_it) = support::mismatch(content_data.begin() + signature.size(), content_data.end(), std::move(io_it), io_end);
						should_write = not (mismatch == std::forward_as_tuple(content_data.end(), io_end)); // got to update, if found a mismatch
					}

					if(io_it.result_index() != 0)
						// TODO: still not perfect, cause have no object to get_message from
						// theoretically since the type indicates a specific error it shouldn't be a problem,
						// but will need a bit of an overhaul of errors in simple::io to support that
						// either way we abandon ship, cause something must've went totally wrong
						throw std::runtime_error("Failed to read "s + path.native());

					return should_write;

				},
				[&path=path](error e) { throw std::runtime_error("Failed to open "s + path.native() + "\n Error: " + get_message(e)); return false; }
			}, io_handle);
		},
		out_invoke([](const auto& file)
		{
			auto& [path, io_handle, content] = file;

			if(std::empty(content))
			{
				if(debug) std::cerr << "Removing: " << path << '\n';
				fs::remove(path);
			}
			else
			{
				if(debug) std::cerr << "Writing: " << path << '\n';
				auto& write_handle = get<0>(io_handle);
				if(seek(write_handle, 0).index() != 0)
					throw std::runtime_error("Failed to seek "s + path.native());
				auto content_data = as_byte_view(content);
				std::visit(overload{
					// TODO: c++20 can just capture structured binding without this silliness
					[&path=path](error) { throw std::runtime_error("Failed to write "s + path.native()); },
					[&,&path=path](const std::byte* written) { if(written != content_data.end()) throw std::runtime_error("Not all data written to "s + path.native()); }
				}, write(write_handle, content_data));
				if(resize(write_handle, content.size()).index() != 0)
					throw std::runtime_error("Failed to resize "s + path.native());
			}
		}
	))));
}

int main(int argc, char const* argv[]) try
{
	assert(std::is_sorted(types.begin(), types.end()));

	assert(argc > 0); ++argv; --argc; // stupid first arg is stupid

	if(std::find(argv, argv+argc, "-h"s) != argv+argc)
	{
		get<0>(write(standard_error, as_byte_view("allinclude [OPTIONS]\n"
			"OPTIONS:\n"
			"  -h       show this help\n"
			"  -t dir   target directory (default: current path)\n"
			"  -f       force overwrite\n"
			"  -x file  exclude a header file\n"
			"  -q       no logs\n"
			"  -v       some logs\n"
			"  -vv      a lot of logs\n"
			"  -vvv     spam logs\n"
			"\n"
			"CONFIGURATION FILES:\n"
			"  .allinclude_exclude    newline separated list of header files to exclude\n"
		)));
		return 0;
	}

	debug = (std::find(argv, argv+argc, "-vvv"s) != argv+argc) ? 4 :
		(std::find(argv, argv+argc, "-vv"s) != argv+argc) ? 3:
		(std::find(argv, argv+argc, "-v"s) != argv+argc) ? 2 :
		(std::find(argv, argv+argc, "-q"s) != argv+argc) ? 0 :
		1;

	if(debug >= 3) std::cerr << "Reading target argument" << '\n';
	auto target_arg = std::find(argv, argv+argc, "-t"s);
	fs::path target = target_arg < (argv+argc-1) ? *(target_arg + 1) : fs::current_path();
	if(!fs::is_directory(target))
	{
		std::cerr << target << " is not a directory!" << '\n';
		return -1;
	}

	if(debug >= 3) std::cerr << "Reading exclude arguments" << '\n';
	{
		auto x = std::find(argv, argv+argc, "-x"s);
		while(x < argc+argv-1)
		{
			++x;
			excluded_headers.emplace_back(*x);
			x = std::find(x+1, argv+argc, "-x"s);
		}
	}

	if(debug >= 3) std::cerr << "Reading exclude configuration file: " << exclude_filename << '\n';
	if(fs::exists(exclude_filename))
	{
		auto exclude_file = file::ropen(exclude_filename);
		std::string line;
		while(std::getline(exclude_file, line))
			excluded_headers.emplace_back(line);
	}

	if(debug >= 2)
	{
		std::cerr << "Target: " << target << '\n';
		std::cerr << "Debug level: " << debug << '\n';
		if(not std::empty(excluded_headers))
		{
			std::cerr << "Excluding: " << '\n';
			copy(excluded_headers, std::ostream_iterator<fs::path>(std::cerr, "\n"));
			std::cerr << '\n';
		}
	}

	if(debug >= 2) std::cerr << "Analyzing directory structure" << '\n';
	generate_allinclude_info(target);
	if(debug >= 2) std::cerr << "Writing include files" << '\n';
	write_allincludes(std::find(argv, argv+argc, "-f"s) != argv+argc);
}
catch(const std::exception& ex)
{
	if(errno)
		std::perror("Error: ");

	std::cerr << "Exception: " << ex.what() << '\n';

	throw;
}

// ^^ COMMENTS ^^
//
// implicit_path: woopsies, didn't know entry implicitly converts to path, ha
// ha, stupid stupid me, how shameful, oh my, I guess I gotta remove this line
// now... no! transform stays! implicit conversion should go! >:( let me see
// that defect report now :V
//
// making_a_case:
// this abomination replaces the following "simple" loop
// offset_iterator headers_offset(includes);
// for(auto&& entry : fs::directory_iterator(info.directory))
// {
//  // filter
//  if(is_directory(entry) || is_header(entry))
//  {
//   // transform + expand
// 	 includes.push_back(entry.path());
//
//   // partition
// 	 if(is_directory(entry))
//    std::iter_swap(headers_offset++, includes.end() - 1);
//  }
// }
// what's the point you would ask... well, the abomination reads better, you
// can take in what's happening in a more modular sequential way, it's as if
// the list of directory entries were filtered, then partitioned, then
// transformed, then pushed into the vector, while in reality it's an
// amalgamated mess that you can see in the for loop - the filter is a block,
// so have to follow brackets and indentation (simple enough in this case, but
// you still have to do it), next comes the push_back with a rather easy to
// miss transform tucked in, and then bam! a partition(which without the
// comments would need to be identified first) using information from entry
// prior to transformation...  also it's apparent that the headers_offset is
// the partition point returned by this algorithm, while with the for loop it's
// a mutable state that you also need to track
// is it easier to write? no, and that's fine. first of all you need to
// identify these algorithms, then you need to know what's available in your
// toolbox, or potentially add new tools, then you need to deal with function
// object boilerplate, then you need to close all those parens at the end (you
// can go nuts overloading a binary operator to not have to do that one simple
// thing, but is it worth it?), then you need to dig into the resulting
// iterator to get whatever you need out of it, but that's all fine, cause it's
// even harder to make non obvious mistakes there, both at first and afterwards
// every time this is revisited
// is it zero cost theoretically? offset_expander is an overkill (unavoidable
// branch on proxy assignment), but something lighter could be used here - an
// iterator that always pushes back when assigned and always returns the last
// element when read... call it just plain expander I guess... or back_inserter
// if we're being bold... should also give lower/upper_bound_inserter a try,
// something I wanted since forever, but never knew how to go about, so I would
// say yes it is theoretically zero cost compared to the naive loop
// looking at the loop and forgetting about the algorithmic pipeline you are
// tempted to "optimize" it... checking is_directory twice, feels bad man, and
// those swap shenanigans, we just want directories first and headers last,
// right?
// int headers_offset = 0;
// for(auto&& entry : fs::directory_iterator(info.directory))
// {
//  if(is_directory(entry))
//  {
// 	 includes.insert(entry.path());
// 	 ++headers_offset = 0;
// 	}
// 	else if(is_header(entry))
//  {
// 	 includes.push_back(entry.path());
//  }
// }
// then you might even think that insert is a problem, and replace vector with
// deque... sad sad story... and there are people who think that this is
// actually better than having to learn algorithms... cause you know we gotta
// hire sum javascript ninjas and have them be "productive" from day one...
//
// vs_views: this kind of output iterator chaining maps to loop body much more
// directly than the standard (or ranges v3) views, which are more like
// manipulating the loop statement itself and can often be hard to reason about
// without going into implementation details, and also can't cover this
// particular use case afaik
//
// pp_in_out:
// i want my pp in 'n out
